#Trying a cluster analysis for BS_MBD stuff
#load libraries
library(vegan)
library(permute)
library(cluster)
library(pvclust)
library(simba)
#to reduce the size of the data I filtered to only CG that were annotated as exons and then I removed dupes (randomly ~4000) because some of the CG have duplicate annotations because of genes going both directions on the strand.  Obviously, not what I want to do randomly, but I'm just practicing.
CGexons<-read.csv('CG 500k master list exons only.csv',header=TRUE,row.names=1)
###compute the dissimilarity/distance matrix
#I am NOT going to standardize or normalize even though the scales for %methylations and gene length (for example) are on different scales
CGexons.gower<-daisy(CGexons,metric="gower")
#NOTE: I received a warning that my dummy variables are being treated as "interval scaled", I need to figure out if I have to change these to 'factor' variables or something..
###compute hierarchical clustering
#this one is hierarchical agllomerative on dissimilarity "complete" linkage
CGexonscl.com<-hclust(CGexons.gower,method='complete')